library(tidyverse)
library(ggplot2)
library(plotly)
library(DT)

Exercise 1

SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
d <- ggplot(data = SNPs)+ 
  geom_bar(mapping = aes(x = chromosome))
d + ggtitle("Number of SNPs per chromosome")

d

Exercise 2

colors <- c("AA"="red","AC"="red","AT"="red","AG"="red","CC"="red","CG"="red","CT"="red","GG"="red","GT"="red","TT"="red","A"="yellow","T"="yellow","G"="yellow","C"="yellow","DD"="green","II"="green","D"="green","DI"="green","I"="green","--"="black")


dinuc <- c("AA","AC","AT","AG","CC","CG","CT","GG","GT","TT")
mono <- c("A","T","G","C")
others<- c("DD","II","DI","D","I","--")

d <- ggplot(data = SNPs)+ 
  geom_bar(mapping = aes(x = chromosome, fill = genotype))+
  ggtitle("Number of SNPs per Chromosome")+
  ylab("SNP count")+
  xlab("Chromosome Number")+
  scale_fill_manual(values = c(colors),
                    name = "Nucleotides",
                    breaks = c(dinuc,mono,others))
d

Exercise 3

Nucleotide counts per chromosome

Nucleotide counts per chromosome

Exercise 4

d <- ggplot(data = SNPs)+ 
  geom_bar(position = position_dodge(),mapping = aes(x = chromosome, fill = genotype ))+
  facet_wrap(~genotype, ncol=2) +
  ggtitle("Number of Nucleotides per Chromosome")+
  xlab("Chromosome Number")+
  ylab("Number of Nucleotides")+
  theme(axis.text.x = element_text(angle = 0, size = 4))
d

Exercise 5

d <- ggplot(data = SNPs)+ 
  geom_bar(position = position_dodge(),mapping = aes(x = chromosome, fill = genotype ))+
  facet_wrap(~genotype, ncol=2)+
  ggtitle("Number of Nucleotides per Chromosome")+
  xlab("Chromosome Number")+
  ylab("Number of Nucleotides")+
  theme(axis.text.x = element_text(angle = 0, size = 4)
        )
ggplotly(d)

Exercise 6

SNP_table_Y <- subset(SNPs, chromosome == 'Y')
datatable(SNP_table_Y)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html